In [1]:
import plotly.offline as pyo

from plotly.graph_objs import *

import chart_studio.plotly as py

import pandas as pd
from pandas import DataFrame
In [2]:
pyo.offline.init_notebook_mode()
In [3]:
from scipy import stats
In [4]:
lifeExpectancy = pd.read_csv(r"../Data/LifeExpectancyCigarettePrices.csv", index_col = 0)
lifeExpectancy['text'] = lifeExpectancy.apply(lambda x: 
    "<b>{}</b><br>Life expectancy for {}s at 60: {} years<br>Price of cigarettes: ${:.2f}".format(x['Country'], 
                                                                                  x['Sex'],
                                                                                x['Years'],
                                                                                 float(x['Most sold cigarette brand (US$)'])), axis = 1)
In [5]:
regions = list(lifeExpectancy['Region'].unique())
sexes = list(lifeExpectancy['Sex'].unique())
markerLookup = {'Eastern Mediterranean' : {'symbol' : 'circle'},
                     'Europe' :           {'symbol' : 'square'},
                     'Africa' :           {'symbol' : 'diamond'},
                     'Americas' :         {'symbol' : 'triangle-up'},
                     'Western Pacific' :  {'symbol' : 'cross'},
                     'South-East Asia' :  {'symbol' : 'x'},
                'Male' : {'color' : '#663399'}, 
                'Female' :{'color' : '#FF6347'}}
In [6]:
traces = []
for sex in sexes:
    for reg in regions:
        traces.append({'type' : 'scatter',
                      'mode' : 'markers',
                      'x' : lifeExpectancy.loc[(lifeExpectancy['Region'] == reg) & (lifeExpectancy['Sex'] == sex),
                                               'Most sold cigarette brand (US$)'],
                        'y' : lifeExpectancy.loc[(lifeExpectancy['Region'] == reg) & (lifeExpectancy['Sex'] == sex), 'Years'],
                       'text' : lifeExpectancy.loc [(lifeExpectancy['Region'] == reg) & (lifeExpectancy['Sex'] == sex),'text'],
                       'legendgroup' : reg,                 
                       'hoverinfo' : 'text',
                       'marker' : {'color' : markerLookup[sex]['color'],
                                   'symbol' : markerLookup[reg]['symbol'],
                                  'opacity' : 0.7},
                      'name' : "{} {}s".format(reg, sex)})
In [7]:
layout = {'title' : 'Life Expectancy Against Price of Most Popular Brand of Cigarettes (2011)',
         'xaxis' : {'title' : 'Price of most popular brand of cigarettes',
                    'range' : [0, 
                               lifeExpectancy['Most sold cigarette brand (US$)'].max() * 1.05],
                   'tickformat' : "${:}"},
         'yaxis' : {'title' : 'Life expectancy at age 60 (years)',
                    'range' : [lifeExpectancy['Years'].min()*0.9, 
                              lifeExpectancy['Years'].max()*1.05],},
         'hovermode' : 'closest'}
fig = Figure(data=traces, layout=layout)
pyo.iplot(fig)
In [8]:
slope, intercept, r_value, p_value, std_err = stats.linregress(lifeExpectancy['Most sold cigarette brand (US$)'],
                                                              lifeExpectancy['Years'])
In [9]:
slope, intercept
Out[9]:
(0.8372550180526643, 17.198488595559095)
In [10]:
r_value**2, p_value, std_err
Out[10]:
(0.3135512088415398, 1.9844110607238766e-31, 0.06511084122643646)
In [11]:
xValRange = [0, lifeExpectancy['Most sold cigarette brand (US$)'].max()]

line = [slope * xValRange[0] + intercept, slope * xValRange[1] + intercept]
line
Out[11]:
[17.198488595559095, 28.333980335659533]
In [12]:
traces.append({'type' : 'scatter',
              'mode' : 'lines',
              'x' : xValRange,
              'y' : line,
               'marker' : {'color' : '#333'},
               'hoverinfo' : 'none',
               'showlegend' : False })
In [13]:
fig = Figure(data=traces, layout=layout)
pyo.iplot(fig)
In [14]:
equationAnnotation = {'text' : "y = {:.2f}x + {:.2f}<br>R<sup>2</sup> = {:.2f}".format(slope, intercept, r_value**2),
                     'xref' : 'x',
                     'yref' : 'y',
                      'x' : 10,
                      'y' : 28,
                     'showarrow' : False}
In [15]:
layout['annotations'] = [equationAnnotation]
fig = Figure(data=traces, layout=layout)
pyo.iplot(fig)
In [ ]: